
#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages
library(here)
library(tidyverse)
library(ggpubr)
library(countrycode)
library(ggrepel)
library(ggthemes)
library(modelsummary)
library(fixest)
library(marginaleffects)
library(estimatr)
library(brglm)
library(GJRM)
library(lmtest)
library(sandwich)
library(zoo)



ggplot_theme <- theme(axis.text=element_text(size=16),
                      axis.title=element_text(size=16,face="bold"), 
                      title =element_text(size=16, face='bold'))

legend_size_theme <-   theme(legend.key.size = unit(1, 'cm'), #change legend key size
                             legend.key.height = unit(1, 'cm'), #change legend key height
                             legend.key.width = unit(1, 'cm'), #change legend key width
                             legend.title = element_text(size=16), #change legend title font size
                             legend.text = element_text(size=16)) #change legend text font size 

#### set the working directory, if not using the Rproject #####


#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

# 1. Load V-Dem data (version 14)

vdem_full_v14 <- readRDS("data/vdem14/V-Dem-CY-Full+Others-v14.rds")

vdem_subset <- vdem_full_v14 %>%
  dplyr::select(country_name, year, country_text_id, COWcode, country_id, starts_with("v2x_polyarchy"),
                starts_with("v2xca_academ"), starts_with("v2cafres"), starts_with("v2cafexch"), 
                starts_with("v2cainsaut"), starts_with("v2casurv"), 
                starts_with("v2clacfree"), e_pop, e_pop_sd, e_gdppc, e_gdppc_sd,  starts_with("v2x_liberal"), 
                v2x_regime, e_regionpol_6C, e_regionpol, v2canuni, v2caprotac, v2x_veracc, v2x_horacc, v2x_diagacc, 
                v2lgbicam, v2juhcind, v2jureview, v2exl_legitideol, v2exl_legitperf, v2exl_legitratio, 
                v2cacamps,e_boix_regime, v2petersch ) %>%
  filter(year >=1899)

episode_data <- read.csv("data/episode_data/episode_with_uncertainty_interval.csv")

episode_data <- episode_data %>%
  dplyr::select(-c(X, starts_with("v2xca_academ"), country_id, country_name))

summary(episode_data)

ert_data <- read.csv("data/ert_v14.csv")

ert_data <- ert_data %>%
  dplyr::select(-c(X, starts_with("v2x_polyarchy"), v2x_regime, country_id, country_name))

#---------------------------------------------------#
#--------------------merge data --------------------#
#---------------------------------------------------#

vdem_subset <- vdem_subset %>%
  left_join(episode_data, by = c("country_text_id", "year")) %>%
  left_join(ert_data, by = c("country_text_id", "year")) %>%
  group_by(country_text_id) %>%
  mutate(v2xca_academ_lag = lag(v2xca_academ)) 

vdem_subset <- vdem_subset %>%
  group_by(country_text_id) %>%
  mutate(aut_ep_lag_1 = lag(aut_ep), 
         aut_ep_lag_2 = lag(aut_ep, 2),
         aut_ep_lag_3 = lag(aut_ep, 3),
         aut_ep_lag_4 = lag(aut_ep, 4),
         aut_ep_lag_5 = lag(aut_ep, 5),
         aut_ep_lag_1_3 = ifelse(aut_ep_lag_1 == 1 | aut_ep_lag_2 == 1 | aut_ep_lag_3 == 1, 1, 0), 
         aut_ep_lag_1_5 = ifelse(aut_ep_lag_1 == 1 | aut_ep_lag_2 == 1 | aut_ep_lag_3 == 1 | aut_ep_lag_4 == 1 | aut_ep_lag_5 == 1, 1, 0)) %>%
  mutate(aut_ep_lag_1_3 = ifelse(aut_ep_lag_1_3 == 0 & aut_ep == 1, 1, aut_ep_lag_1_3), 
         aut_ep_lag_1_5 = ifelse(aut_ep_lag_1_5 == 0 & aut_ep == 1, 1, aut_ep_lag_1_5)) 

test_data <- vdem_subset %>%
  dplyr::select(country_text_id, year, aut_ep_id, aut_ep, aut_ep_lag_1, aut_ep_lag_1_3, aut_ep_lag_1_5 )

rm(test_data)

################################################################################
################################################################################

epsm_data <- readxl::read_xlsx("data/EPSM dataset/EPSM_dataset_clean.xlsx")

epsm_data <- epsm_data %>%
  dplyr::select(-c(country_name, country_id, project, historical, histname, gapstart1, gapstart2, gapstart3, 
                   gapend1, gapend2, gapend3, COWcode))

table(epsm_data$operate_high)
table(epsm_data$edu_dep)

vdem_subset <- vdem_subset %>%
  left_join(epsm_data, by = c("country_text_id", "year"))

vdem_subset <- vdem_subset %>%
  mutate(edu_dep = ifelse(edu_dep == 1, 0, 1)) # 0 = no centralized education department at national level, 1 = national level education department
table(vdem_subset$edu_dep)

vdem_subset <- vdem_subset %>%
  mutate(operate_high_simple = ifelse(operate_high == 1 | operate_high == 2 , "subnational", 
                                      ifelse(operate_high == 3, "national", 
                                             ifelse(operate_high == 10, "subnational", 
                                                    ifelse(operate_high == 9 | operate_high == 4 | operate_high == 5 | operate_high == 6 | 
                                                             operate_high == 7, "private organization", "Private and Public")))))

table(vdem_subset$operate_high_simple)


#---------------------------------------------------#
#------------ Stock Variable Functions--------------#
#---------------------------------------------------#


decay_sum <- function(tm, vl, decay) {
  last_time <- 0
  current_sum <- 0
  sums <- numeric(length(vl))
  ldecay <- (1-decay)
  for (i in 1:length(vl)) {
    delta <- as.numeric(tm[i] - last_time)
    current_sum <- current_sum * (ldecay * delta) + vl[i]
    last_time <- tm[i]
    sums[i] <- current_sum
  }
  sums
}

annual_depricition_function  <- function(year, var, decay) {
  last_time <- first(year)
  current_sum <- 0
  sums <- numeric(length(var))
  ldecay <- (1-decay)
  for (i in 1:length(var)) {
    delta <- as.numeric(year[i] - last_time)
    current_sum <- ldecay * (current_sum + decay * var[i])
    last_time <- year[i]
    sums[i] <- current_sum
  }
  sums
}


## Functions ##

na_interpolation2 <- function(x, option = "linear", maxgap) {
  library(imputeTS)
  library(dplyr)
  
  total_not_missing <- sum(!is.na(x))
  
  # check there is sufficient data for na_interpolation 
  if(total_not_missing < 2) {x} 
  
  else
    
    # replace takes an input vector, a T/F vector & replacement value
  {replace(
    # input vector is interpolated data
    # this will impute leading/lagging NAs which we don't want 
    imputeTS::na_interpolation(x, option = option, maxgap = maxgap), 
    
    # create T/F vector for NAs,  
    is.na(na.approx(x, na.rm = FALSE)), 
    
    # replace TRUE with NA in input vector  
    NA) 
  }
}

vdem_subset <- vdem_subset %>%
  group_by(country_id) %>%
  mutate(v2xca_academ_interpolated = na_interpolation2(v2xca_academ, option = "linear", maxgap = 5)) %>%
  drop_na(v2xca_academ_interpolated) %>%
  mutate(v2xca_academ_stock = annual_depricition_function(year, v2xca_academ_interpolated, 0.02)) %>%
  dplyr::select(country_id, country_name, year, v2xca_academ, v2xca_academ_stock, everything())


vdem_subset <- vdem_subset %>%
  group_by(country_id) %>%
  fill(e_gdppc,e_pop, .direction = "down")

#### Data Wrangling ####

vdem_subset_v14 <- vdem_subset %>%
  group_by(e_regionpol, year) %>%
  mutate(regionpol_EDI = mean(v2x_polyarchy, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(country_id) %>%
  mutate(e_gdppc_growth = ((e_gdppc - dplyr::lag(e_gdppc))/dplyr::lag(e_gdppc))*100) %>%
  mutate(e_gdppc_growth_roll5 = rollmean(e_gdppc_growth, k = 5, fill = NA)) %>%
  ungroup() %>%
  group_by(decline_episode_id) %>%
  mutate(decline_episode_duration =  year- first(year), 
         decline_episode_duration = ifelse(is.na(decline_episode_id), 0, decline_episode_duration)) %>%
  group_by(country_id) %>%
  mutate(group_id = cumsum(decline_episode != lag(decline_episode, default = FALSE))) %>% # group_id for each autocra_period
  group_by(country_id, group_id) %>%
  mutate(spell = year-first(year), 
         spell = ifelse(is.na(aut_ep_id), spell, 0))

vdem_subset_v14 <- vdem_subset_v14 %>%
  distinct(country_text_id, year, .keep_all = T)

summary(vdem_subset_v14$e_pop)
summary(vdem_subset_v14$e_gdppc)

vdem_subset_v14 <- vdem_subset_v14 %>%
  mutate(e_pop_log = log(e_pop))

summary(vdem_subset_v14$e_pop_log)

## lags for independent variables ##

vdem_subset_v14_noNA <- vdem_subset_v14 %>%
  mutate(year_1900 = year -1900, 
         v2lgbicam = ifelse(v2lgbicam >=1, 1, 0), 
         v2caprotac = ifelse(v2caprotac == 95, 0, 
                             ifelse(v2caprotac == 97, 0, 
                                    ifelse(v2caprotac == 99, 0, v2caprotac)))) %>%
  group_by(country_id) %>%
  mutate(v2xca_academ_stock_lag = lag(v2xca_academ_stock), 
         operate_high_simple_lag = lag(operate_high_simple), 
         edu_dep_lag = lag(edu_dep),
         e_gdppc_lag = lag(e_gdppc), 
         e_gdppc_growth_roll5_lag = lag(e_gdppc_growth_roll5), 
         regionpol_EDI_lag = lag(regionpol_EDI), 
         e_pop_log_lag = lag(e_pop_log), 
         v2canuni_lag = lag(log(v2canuni+1)), 
         v2caprotac_lag = lag(v2caprotac), 
         v2x_veracc_lag = lag(v2x_veracc), 
         v2x_diagacc_lag = lag(v2x_diagacc), 
         v2x_horacc_lag = lag(v2x_horacc), 
         v2lgbicam_lag = lag(v2lgbicam), 
         v2juhcind_lag = lag(v2juhcind), 
         v2jureview_lag = lag(v2jureview), 
         v2exl_legitideol_lag = lag(v2exl_legitideol), 
         v2exl_legitperf_lag = lag(v2exl_legitperf), 
         v2exl_legitratio_lag = lag(v2exl_legitratio), 
         v2cacamps_lag = lag(v2cacamps), 
         e_boix_regime_lag = lag(e_boix_regime), 
         v2petersch_lag = lag(v2petersch)      
  )

vdem_subset_v14_noNA <- vdem_subset_v14_noNA %>%
  mutate(region_factor = as.factor(e_regionpol_6C),
         region_factor = relevel(region_factor, ref=1), 
         spell = spell/100,
         spell_sq = spell^2/100,
         spell_cb = spell^3/100,
         year_sq = year_1900^2)

summary(vdem_subset_v14_noNA$v2caprotac_lag)
table(vdem_subset_v14_noNA$v2caprotac_lag)

vdem_subset_v14_noNA <- vdem_subset_v14_noNA %>%
  group_by(country_id) %>%
  fill(e_gdppc_growth_roll5_lag, .direction = "down")
  

## drop NA ## 
vdem_subset_v14_noNA <- vdem_subset_v14_noNA %>%
  filter(v2x_regime >= 2) %>%
  group_by(country_text_id) %>%
  drop_na(e_gdppc, e_gdppc_growth_roll5_lag, regionpol_EDI_lag, 
          e_pop_log, region_factor, year_1900, spell, country_id) 

summary(vdem_subset_v14_noNA)
length(unique(vdem_subset_v14_noNA$country_name)) # 115 countries 
min(vdem_subset_v14_noNA$year)
max(vdem_subset_v14_noNA$year)

vdem_subset_v14_noNA_test <- vdem_subset_v14_noNA %>%
  dplyr::select(country_name, country_text_id, year, decline_episode, e_gdppc, e_gdppc_growth_roll5_lag, regionpol_EDI_lag, 
                e_pop_log, region_factor, year_1900, spell, country_id)


################################################################################
################################################################################

#---------------------------------------------------#
#------------------ Table 3 ------------------------#
#---------------------------------------------------#

## setting ongoing events to NA as recommend by McGrath (2015)

vdem_subset_v14_noNA <- vdem_subset_v14_noNA %>%
  group_by(country_id) %>%
  mutate(decline_episode_onset = ifelse(decline_episode==1 & lag(decline_episode)== 1, NA, decline_episode)) %>%
  dplyr::select(country_id, year, aut_ep, decline_episode_onset, everything()) %>%
  drop_na(decline_episode_onset)

summary(vdem_subset_v14_noNA$decline_episode_onset) # 0.7% of all country-years
table(vdem_subset_v14_noNA$decline_episode_onset) # 100 AF decline episodes onsets since 1900

vdem_subset_v14_noNA <- vdem_subset_v14_noNA %>%
  mutate(stage1_res = ifelse(decline_episode == 0, 1, 0))

summary(vdem_subset_v14_noNA$stage1_res) # 99.3% of all country-years
table(vdem_subset_v14_noNA$stage1_res) # 99.3% of all country-years

# only 37 onsets of academic freedom decline episodes in democracies 

#### Descriptive analysis ####

# number of onsets before 2000? 18
sum(vdem_subset_v14_noNA$decline_episode_onset[vdem_subset_v14_noNA$year < 2000], na.rm = T)

# number of onsets after 2000? 19
sum(vdem_subset_v14_noNA$decline_episode_onset[vdem_subset_v14_noNA$year >= 2000], na.rm = T)

# number of country-years that were at risk in total? 4830
sum(length(vdem_subset_v14_noNA$year))

# how many country years were at risk before 2000? 2692
sum(length(vdem_subset_v14_noNA$year[vdem_subset_v14_noNA$year < 2000]))

# how many country years were at risk after 2000? 2138
sum(length(vdem_subset_v14_noNA$year[vdem_subset_v14_noNA$year >= 2000]))


# percentage of resilient countries total: 99.27%
((sum(length(vdem_subset_v14_noNA$year)) - sum(vdem_subset_v14_noNA$decline_episode_onset, na.rm = T)) / sum(length(vdem_subset_v14_noNA$year)))*100 

# percentage of resilient countries before 2000: 99.33%
((sum(length(vdem_subset_v14_noNA$year[vdem_subset_v14_noNA$year < 2000])) - sum(vdem_subset_v14_noNA$decline_episode_onset[vdem_subset_v14_noNA$year < 2000], na.rm = T)) / sum(length(vdem_subset_v14_noNA$year[vdem_subset_v14_noNA$year < 2000])))*100 

# percentage of resilient countries after 2000: 99.11%
((sum(length(vdem_subset_v14_noNA$year[vdem_subset_v14_noNA$year >= 2000])) - sum(vdem_subset_v14_noNA$decline_episode_onset[vdem_subset_v14_noNA$year >= 2000], na.rm = T)) / sum(length(vdem_subset_v14_noNA$year[vdem_subset_v14_noNA$year >= 2000])))*100 


## List of onsets ##

list_of_onsets <- vdem_subset_v14_noNA %>% 
  filter(stage1_res == 0) %>%
  dplyr::select(country_name, year, stage1_res, aut_ep_lag_1)

length(unique(vdem_subset_v14_noNA$country_name)) # number of countries 


##################################################################################
##################################################################################

#---------------------------------------------------#
#------------------ Table 4 ------------------------#
#---------------------------------------------------#

#### Linear Models with Different Autocratization Measures #####

# Run model 1
onset_m1 <- brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1 +  
                           e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                           region_factor + year_1900 + year_sq, 
                         data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                         method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                         pl=T)
summary(onset_m1)

#Clustered SEs
ses_ons1_cl <-  coeftest(onset_m1, vcov = vcovCL(onset_m1, cluster = vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)


# Run model 2
onset_m2 <- brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 + 
                           e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                           region_factor + year_1900 + year_sq, 
                         data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                         method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                         pl=T)
summary(onset_m2)

#Clustered SEs
ses_ons2_cl <-  coeftest(onset_m2, vcov = vcovCL(onset_m2, cluster = vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)

# Run model 3
onset_m3 <- brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_5 + 
                           e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                           region_factor + year_1900 + year_sq, 
                         data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                         method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                         pl=T)
summary(onset_m3)

#Clustered SEs
ses_ons3_cl <-  coeftest(onset_m3, vcov = vcovCL(onset_m3, cluster = vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)


## Extract models ##
texreg::wordreg(list(onset_m1, onset_m2, onset_m3), 
                override.pvalues=list(ses_ons1_cl[, 4], ses_ons2_cl[, 4], ses_ons3_cl[, 4]),
                override.se= list(ses_ons1_cl[, 2], ses_ons2_cl[, 2], ses_ons3_cl[, 2]), 
                file = "outputs/Table_4_Regression_Autocratization_Democracies.docx", 
                custom.coef.names=c("Intercept", 
                                    "Autocratization Episode t-1", 
                                    "GDP pc log",
                                    "GDP growth",
                                    "Population log",
                                    "Western Europe and North America",
                                    "Subsaharan Africa",
                                    "Asia and Pacific", 
                                    "Eastern Europe and Central Asia",
                                    "MENA", 
                                    "Year",
                                    "Year squared",
                                    "Autocratization Episode t-1 to t-3", 
                                    "Autocratization Episode t-1 to t-5")
)



##################################################################################
##################################################################################

#---------------------------------------------------#
#------------------ Figure 2 -----------------------#
#---------------------------------------------------#

model_1 <-  sjPlot::plot_model(onset_m1, type = "eff",
                               terms = c("aut_ep_lag_1[0,1]"),
                               vcov.fun = "vcovCL", 
                               vcov.type = "HC1",
                               vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                               alpha = 0.3) 
model_1_data  <- model_1$data

model_1 <- ggplot(model_1_data, aes(x = as.character(x), y = predicted, label = round(predicted,3))) +
  geom_point() +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) + 
  geom_text(nudge_x = 0.1) +
  theme_bw() + 
  scale_y_continuous("P(AF resilience)", limits = c(0.9,1.01), breaks = seq(0.9,1, 0.02)) +
  scale_x_discrete("", labels = c(
    "0" = "No autocratization episode",
    "1" = "Autocratization episode")) +
  coord_flip() + ggtitle("Model 1")

model_2 <-  sjPlot::plot_model(onset_m2, type = "eff",
                               terms = c("aut_ep_lag_1_3[0,1]"),
                               vcov.fun = "vcovCL", 
                               vcov.type = "HC1",
                               vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                               alpha = 0.3) 
model_2_data  <- model_2$data

model_2 <- ggplot(model_2_data, aes(x = as.character(x), y = predicted, label = round(predicted,3))) +
  geom_point() +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) + 
  geom_text(nudge_x = 0.1) +
  theme_bw() + 
  scale_y_continuous("P(AF resilience)", limits = c(0.90,1.01), breaks = seq(0.90,1, 0.02)) +
  scale_x_discrete("", labels = c(
    "0" = "No autocratization episode",
    "1" = "Autocratization episode")) +
  coord_flip()  + ggtitle("Model 2")

model_3 <-  sjPlot::plot_model(onset_m3, type = "eff",
                               terms = c("aut_ep_lag_1_5[0,1]"),
                               vcov.fun = "vcovCL", 
                               vcov.type = "HC1",
                               vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                               alpha = 0.3) 
model_3_data  <- model_3$data

model_3 <- ggplot(model_3_data, aes(x = as.character(x), y = predicted, label = round(predicted,3))) +
  geom_point() +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) + 
  geom_text(nudge_x = 0.1) +
  theme_bw() + 
  scale_y_continuous("P(AF resilience)", limits = c(0.9,1.01), breaks = seq(0.9,1, 0.02)) +
  scale_x_discrete("", labels = c(
    "0" = "No autocratization episode",
    "1" = "Autocratization episode")) +
  coord_flip()  + ggtitle("Model 3")

remove_y <- theme(
  axis.text.y = element_blank(),
  axis.ticks.y = element_blank(),
  axis.title.y = element_blank()
)

library(patchwork)

wrap_plots(model_1, model_2 + remove_y , model_3 + remove_y, nrow = 1) + plot_layout(guides = "collect")

ggsave("outputs/Figure_2_Predicted_Probabilites_Regression_1_Democracies.png", width = 20,
       height = 10,
       units = c("cm"), dpi = 600)


################################################################################
################################################################################

#---------------------------------------------------#
#------------------ Table 5 ------------------------#
#---------------------------------------------------#


# Run model 4 with university number of constituional proctection 
onset_m4 <-  brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 +
                            e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                            v2canuni_lag + v2caprotac_lag + v2jureview_lag +
                            region_factor + year_1900 + year_sq, 
                          data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                          method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                          pl=T)
summary(onset_m4)

#Clustered SEs
ses_ons4_cl <-  coeftest(onset_m4, vcov = vcovCL(onset_m4, vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)

# Run model 5 with tertiary enrollment rates 
onset_m5 <-  brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 +
                            e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                            v2canuni_lag + v2caprotac_lag + v2jureview_lag + v2petersch_lag +
                            region_factor + year_1900 + year_sq, 
                          data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                          method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                          pl=T)
summary(onset_m5)

length(table(vdem_subset_v14_noNA$country_name[!is.na(vdem_subset_v14_noNA$v2petersch_lag)]))
#Clustered SEs
ses_ons5_cl <-  coeftest(onset_m5, vcov = vcovCL(onset_m5, vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)

# Run model 6 with edcuation system variables
onset_m6 <-  brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 +
                            e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                            operate_high_simple_lag + edu_dep_lag + 
                            region_factor + year_1900 + year_sq, 
                          data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                          method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                          pl=T)
summary(onset_m6)

#Clustered SEs
ses_ons6_cl <-  coeftest(onset_m6, vcov = vcovCL(onset_m6, vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)


# Run model 7 with political polarization 
onset_m7 <-  brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 +
                            e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                            v2cacamps_lag + 
                            region_factor + year_1900 + year_sq, 
                          data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                          method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                          pl=T)
summary(onset_m7)

#Clustered SEs
ses_ons7_cl <-  coeftest(onset_m7, vcov = vcovCL(onset_m7, vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)

# Run model 8
onset_m8 <-  brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 +
                            e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                            v2x_veracc_lag + v2x_horacc_lag + v2x_diagacc_lag +  
                            region_factor + year_1900 + year_sq, 
                          data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                          method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                          pl=T)
summary(onset_m8)

#Clustered SEs
ses_ons8_cl <-  coeftest(onset_m8, vcov = vcovCL(onset_m8, vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)


# Run model 9
onset_m9 <-  brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 +
                            e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                            v2xca_academ_stock_lag +
                            region_factor + year_1900 + year_sq, 
                          data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                          method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                          pl=T)
summary(onset_m9)

#Clustered SEs
ses_ons9_cl <-  coeftest(onset_m9, vcov = vcovCL(onset_m9, vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)


# Run model 10 with all candidate variable expect tertiary enrollment rates 
onset_m10 <-  brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 +
                            e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                            v2canuni_lag + v2caprotac_lag + v2jureview_lag +
                             operate_high_simple_lag + edu_dep_lag + 
                            v2cacamps_lag + 
                            v2x_veracc_lag + v2x_horacc_lag + v2x_diagacc_lag +  
                             v2xca_academ_stock_lag + 
                            region_factor + year_1900 + year_sq, 
                          data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                          method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                          pl=T)
summary(onset_m10)

#Clustered SEs
ses_ons10_cl <-  coeftest(onset_m10, vcov = vcovCL(onset_m10, vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)


#################################################
## Extract models ##
#################################################

texreg::wordreg(list(onset_m4, onset_m5, onset_m6, onset_m7, onset_m8, onset_m9, onset_m10), 
                override.pvalues=list(ses_ons4_cl[, 4], ses_ons5_cl[, 4], ses_ons6_cl[, 4], ses_ons7_cl[, 4], ses_ons8_cl[, 4], ses_ons9_cl[, 4], ses_ons10_cl[, 4]),
                override.se= list(ses_ons4_cl[, 2], ses_ons5_cl[, 2], ses_ons6_cl[, 2], ses_ons7_cl[, 2], ses_ons8_cl[, 2], ses_ons9_cl[, 2], ses_ons10_cl[, 2]), 
                file = "outputs/Table_5_Regression_Other_Factors_Democracies.docx", 
                custom.coef.names=c("Intercept", 
                                    "Autocratization Episode t-1 to t-3", 
                                    "GDP pc log",
                                    "GDP growth",
                                    "Population log",
                                    "Number of Universities", 
                                    "Constitutional proctection of AF", 
                                    "Judicial review", 
                                    "Western Europe and North America",
                                    "Subsaharan Africa",
                                    "Asia and Pacific", 
                                    "Eastern Europe and Central Asia",
                                    "MENA", 
                                    "Year",
                                    "Year squared",
                                    "Teritary Education Enrollment", 
                                    "University Operation: Private and Public", 
                                    "University Operation: Private", 
                                    "University Operation: Subnational", 
                                    "National-level Education Department", 
                                    "Political Polarization", 
                                    "Vertical Accounability", 
                                    "Horizontal Accountability", 
                                    "Diagonal Accounability", 
                                    "Academic Freedom Stock")
)

################################################################################
################################################################################

#---------------------------------------------------#
#------------------ Figure 3 -----------------------#
#---------------------------------------------------#


university_number <-  sjPlot::plot_model(onset_m4, type = "eff",
                                         terms = c("v2canuni_lag[all]"),
                                         vcov.fun = "vcovCL", 
                                         vcov.type = "HC1",
                                         vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                         alpha = 0.3) +
  ylab("P(AF resilience)") + xlab("Number of Universties (natural log)") + theme_bw() + 
  scale_y_continuous(limits = c(0.9,1), breaks = seq(0.9,1,0.02)) +
  #scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + 
  ggtitle("") +
  geom_rug(data = vdem_subset_v14_noNA, aes(x = v2canuni_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


const_protection <-  sjPlot::plot_model(onset_m4, type = "eff",
                                        terms = c("v2caprotac_lag[0,1]"),
                                        vcov.fun = "vcovCL", 
                                        vcov.type = "HC1",
                                        vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                        alpha = 0.3) 


const_protection_data <- const_protection$data

const_protection <- ggplot(const_protection_data, aes(x = as.character(x), y = predicted, label = round(predicted,3))) +
  geom_point() +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) + 
  geom_text(nudge_x = 0.25) +
  theme_bw() + 
  scale_y_continuous("P(AF resilience)", limits = c(0.96,1.002), breaks = seq(0.96,1,0.02)) +
  scale_x_discrete("", labels = c(
    "0" = "No constitutional protection of AF",
    "1" = "Constitutional protection of AF")) +
  coord_flip()


judicial_review <-  sjPlot::plot_model(onset_m4, type = "eff",
                                       terms = c("v2jureview_lag[all]"),
                                       vcov.fun = "vcovCL", 
                                       vcov.type = "HC1",
                                       vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                       alpha = 0.3) +
  ylab("P(AF resilience)") + xlab("Judicial review") + theme_bw() + 
  scale_y_continuous(limits = c(0.9,1), breaks = seq(0.9,1,0.02)) +
  #scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + 
  ggtitle("") +
  geom_rug(data = vdem_subset_v14_noNA, aes(x = v2jureview_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

edu_department <-  sjPlot::plot_model(onset_m6, type = "eff",
                                        terms = c("edu_dep_lag[0,1]"),
                                        vcov.fun = "vcovCL", 
                                        vcov.type = "HC1",
                                        vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                        alpha = 0.3) 


edu_department_data <- edu_department$data

edu_department <- ggplot(edu_department_data, aes(x = as.character(x), y = predicted, label = round(predicted,3))) +
  geom_point() +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) + 
  geom_text(nudge_x = 0.25) +
  theme_bw() + 
  scale_y_continuous("P(AF resilience)", limits = c(0.96,1.002), breaks = seq(0.96,1,0.02)) +
  scale_x_discrete("", labels = c(
    "0" = "No national-level edcaution department",
    "1" = "National-level edcaution department")) +
  coord_flip()

operate_high_simple <-  sjPlot::plot_model(onset_m6, type = "eff",
                                      terms = c("operate_high_simple_lag"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                      alpha = 0.3) 


operate_high_simple_data <- operate_high_simple$data

operate_high_simple <- ggplot(operate_high_simple_data, aes(x = as.character(x), y = predicted, label = round(predicted,3))) +
  geom_point() +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) + 
  geom_text(nudge_x = 0.25) +
  theme_bw() + 
  scale_y_continuous("P(AF resilience)", limits = c(0.96,1), breaks = seq(0.96,1,0.02)) +
  scale_x_discrete("Unversity Operation", labels = c(
    "1" = "National",
    "2" = "Private and public", 
    "3" = "Private", 
    "4" = "Subnational")) +
  coord_flip()


wrap_plots((university_number | const_protection) / (judicial_review | edu_department) / 
             operate_high_simple) + plot_layout(guides = "collect")


ggsave("outputs/Figure_3_Predicted_Probabilites_Regression_2_Other_factors_Democracies.png", width = 20,
       height = 20,
       units = c("cm"), dpi = 600)

################################################################################
################################################################################

#---------------------------------------------------#
#------------------ Figure 4------------------------#
#---------------------------------------------------#

polarization <-  sjPlot::plot_model(onset_m7, type = "eff",
                                    terms = c("v2cacamps_lag[all]"),
                                    vcov.fun = "vcovCL", 
                                    vcov.type = "HC1",
                                    vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                    alpha = 0.3) +
  ylab("P(AF resilience)") + xlab("Political Polarization") + theme_bw() + 
  scale_y_continuous(limits = c(0.92,1), breaks = seq(0.92,1,0.02)) +
  #scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + 
  ggtitle("") +
  geom_rug(data = vdem_subset_v14_noNA, aes(x = v2cacamps_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

vert_acc <-  sjPlot::plot_model(onset_m8, type = "eff",
                                terms = c("v2x_veracc_lag[all]"),
                                vcov.fun = "vcovCL", 
                                vcov.type = "HC1",
                                vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                alpha = 0.3) +
  ylab("P(AF resilience)") + xlab("Vertical Accountability") + theme_bw() + 
  scale_y_continuous(limits = c(0.65,1), breaks = seq(0.65,1,0.02)) +
  #scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + 
  ggtitle("") +
  geom_rug(data = vdem_subset_v14_noNA, aes(x = v2x_veracc_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

hori_acc <-  sjPlot::plot_model(onset_m8, type = "eff",
                                terms = c("v2x_horacc_lag[all]"),
                                vcov.fun = "vcovCL", 
                                vcov.type = "HC1",
                                vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                alpha = 0.3) +
  ylab("P(AF resilience)") + xlab("Horizontal Accountability") + theme_bw() + 
  scale_y_continuous(limits = c(0.65,1), breaks = seq(0.65,1,0.02)) +
  #scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + 
  ggtitle("") +
  geom_rug(data = vdem_subset_v14_noNA, aes(x = v2x_horacc_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

dia_acc <-  sjPlot::plot_model(onset_m8, type = "eff",
                               terms = c("v2x_diagacc_lag[all]"),
                               vcov.fun = "vcovCL", 
                               vcov.type = "HC1",
                               vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                               alpha = 0.3) +
  ylab("P(AF resilience)") + xlab("Diagonal Accountability") + theme_bw() + 
  scale_y_continuous(limits = c(0.65,1), breaks = seq(0.65,1,0.02)) +
  #scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + 
  ggtitle("") +
  geom_rug(data = vdem_subset_v14_noNA, aes(x = v2x_diagacc_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

v2xca_academ_stock_lag <-  sjPlot::plot_model(onset_m9, type = "eff",
                                         terms = c("v2xca_academ_stock_lag[all]"),
                                         vcov.fun = "vcovCL", 
                                         vcov.type = "HC1",
                                         vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                         alpha = 0.3) +
  ylab("P(AF resilience)") + xlab("Academic Freedom Stock") + theme_bw() + 
  scale_y_continuous(limits = c(0.92,1), breaks = seq(0.92,1,0.02)) +
  #scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + 
  ggtitle("") +
  geom_rug(data = vdem_subset_v14_noNA, aes(x = v2xca_academ_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

wrap_plots((v2xca_academ_stock_lag | polarization) / 
             (vert_acc | hori_acc | dia_acc)) + plot_layout(guides = "collect")

ggsave("outputs/Figure_4_Predicted_Probabilites_Regression_2_Other_factors_Democracies.png", width = 20,
       height = 20,
       units = c("cm"), dpi = 600)

################################################################################
################################################################################

#---------------------------------------------------#
#------------------ Figure 5 -----------------------#
#---------------------------------------------------#

# Run model 4 with university number of constitutional protection 
onset_m4b <-  brglm::brglm(as.factor(stage1_res) ~ aut_ep_lag_1_3 + v2caprotac_lag + v2caprotac_lag*aut_ep_lag_1_3 +
                            e_gdppc_lag + e_gdppc_growth_roll5_lag +  e_pop_log_lag +
                            v2canuni_lag + v2jureview_lag +
                            region_factor + year_1900 + year_sq, 
                          data = vdem_subset_v14_noNA, family = binomial(link = "probit"),
                          method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                          pl=T)
summary(onset_m4b)

#Clustered SEs
ses_ons4b_cl <-  coeftest(onset_m4b, vcov = vcovCL(onset_m4, vdem_subset_v14_noNA[, "country_id"]),
                         type = "HC1", fix = T, cadjust = T)

const_protection <-  sjPlot::plot_model(onset_m4, type = "eff",
                                        terms = c("v2caprotac_lag[0,1]", "aut_ep_lag_1_3[0,1]"),
                                        vcov.fun = "vcovCL", 
                                        vcov.type = "HC1",
                                        vcov.args = "cluster = vdem_subset_v14_noNA$country_id",
                                        alpha = 0.3) 


const_protection_data <- const_protection$data

const_protection_data <- const_protection_data %>%
  mutate(group = ifelse(group == 0, "No Autocratization Episode", "Autocratiaztion Episode")) 

const_protection_data <- const_protection_data[1:4,]

const_protection <- ggplot(const_protection_data, aes(x = as.character(x), color = as.character(group), y = predicted, label = round(predicted,3))) +
  geom_point() +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) + 
  geom_text(nudge_x = 0.1) +
  theme_bw() + 
  scale_y_continuous("Predicted Probability", limits = c(0.84,1.01), breaks = seq(0.84,1,0.02)) +
  scale_x_discrete("", labels = c(
    "0" = "No constitutional protection of AF",
    "1" = "Constitutional protection of AF")) +
  coord_flip() +
  scale_color_colorblind() +
  labs(color = "") +
  guides(color = guide_legend(override.aes = list(size =0.9)), 
         text = "none")

const_protection
ggsave("outputs/Figure_5_Predicted_Probabilites_Auto_ConstProtec.png", width = 20,
       height = 10,
       units = c("cm"), dpi = 600)
